export VLLM_ENGINE_ITERATION_TIMEOUT_S=36000
export VLLM_RPC_TIMEOUT=36000000
export VLLM_ENFORCE_CUDA_GRAPH=0

vllm serve /home/weights/Qwen3-235B-A22B-AWQ \
--served_model_name "qwen3-235-awq" \
-tp 4 -pp 2 \
--max-model-len $[128*1024] --max-num-batched-tokens 2048 \
--rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' \
--enable-reasoning --reasoning-parser qwen3 \
--enable-prefix-caching \
--disable_log_stats --disable_log_requests \
--host 0.0.0.0 --port 8000


# vllm serve /home/weights/Qwen3-235B-A22B-AWQ \
# --served_model_name "qwen3-235-awq" \
# -tp 4 -pp 4 \
# --max-model-len $[128*1024] --max-num-batched-tokens 2048 \
# --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' \
# --enable-reasoning --reasoning-parser qwen3 \
# --enable-prefix-caching \
# --disable_log_stats --disable_log_requests \
# --host 0.0.0.0 --port 8000